<?php
namespace WDB\GTO;
use WDB,
    WDB\Exception;
/**
 * Query parser. Uses WDB SQL dialect.
 *
 * @author Richard Ejem <richard(at)ejem.cz>
 * @package WDB
 * @method object parse(string $source) parse source string into an object
 * @method static object parse(string|iLanguage $language, string $source) parse source string into an object
 */
final class Parser {

    /**@var iLanguage*/
    private $language;

    /**@var Grammar */
    private $g;
    /**@var Token[] list of parsed tokens*/
    private $t;
    /**@var int current token index*/
    private $i;

    private static $parsers = array();

    public function __call($name, $arguments) {
        return call_user_func_array(array($this, '_'.$name), $arguments);
    }

    public static function __callStatic($name, $arguments) {
        return call_user_func_array(array('self', 's_'.$name), $arguments);
    }

    /**
     *
     * @param string|iLanguage $language
     * @param string $source
     * @return Query
     */
    private static function s_parse($language, $source, $symbol = 'Start')
    {
        $class = is_string($language) ? $language : get_class($language);
        if (!isset(self::$parsers[$class]))
        {
            self::$parsers[$class] = new self($language);
        }

        return self::$parsers[$class]->parse($source, $symbol);
    }

    public function __construct($language) {
        if (is_string($language)) {
            $language = new $language; //suppose the argument is a language class name
        }
        elseif (!$language instanceof iLanguage)
        {
            throw new Exception\BadArgument("language must be a class name or iLanguage instance");
        }
        $this->language = $language;
    }

    private function _parse($source, $symbol = 'Start')
    {
        $tokens = $this->tokenize($source);
        $tree = $this->llParse($tokens, $symbol);
        if (!method_exists($this->language, $parseMethod='objectFrom'.$symbol))
        {
            throw new Exception\InvalidOperation("Language ".get_class($this->language)." cannot directly parse symbol $symbol/");
        }
        return $this->language->$parseMethod($tree[$symbol]);
    }

    public function debug($source)
    {
        $tokens = $this->tokenize($source);
        $this->dumpTokens($tokens);
        $tree = $this->llParse($tokens);
        $this->dumpTree($tree);
    }

    private function dumpTokens($tokens)
    {
        foreach ($tokens as $token)
        {
            echo $token->type.': '.$token->content."\n";
        }
    }

    private function dumpTree($tree, $indent = 0)
    {
        foreach ($tree as $key=>$val)
        {
            echo str_repeat('|  ', $indent);
            if ($val instanceof Token)
            {
                echo "$key:{$val->type}={$val->content}\n";
            }
            elseif ($val === NULL)
            {
                echo "$key:\xCE\xBB\n";
            }
            elseif (is_array($val))
            {
                echo "$key ->\n";
                $this->dumpTree($val, $indent+1);
            }
        }
    }

    private function is_terminal($s)
    {
        return strlen($s) > 0 && ctype_lower($s{0});
    }

    private function resolveAlias($symbol)
    {
        if (isset($this->language->getGrammar()->aliases[$symbol])) return $this->language->getGrammar()->aliases[$symbol];
        return $symbol;
    }

    private function llParse(array $tokens, $symbol = 'Start')
    {
        $g = $this->language->getGrammar();
        $stack = array($symbol);
        $ptree = array();
        $stackLevels = array(&$ptree);
        $stackLevelSizes = array(1);
        $upper = NULL;
        while (count($stack) > 0)
        {
            $t = count($tokens) ? $tokens[0]->type : '$END';
            $stackTop = $this->resolveAlias($stack[0]);
            $stackTopAlias = $stack[0];
            $reachedBottom = FALSE;
            if ($this->is_terminal($stackTop))
            {
                if ($t != $stackTop)
                {
                    //$this->dumpTree($ptree);
                    throw new Exception\ParseError("Unexpected token: $t({$tokens[0]->content}), expected {$stack[0]}");
                }
                else
                {
                    $stackLevels[0][$stackTopAlias] = array_shift($tokens); //get terminal from input
                    --$stackLevelSizes[0];

                    $reachedBottom = TRUE;
                    array_shift($stack);  //pop terminal from stack
                }
            }
            else
            {
                $rule = NULL;
                if (!isset($g->parsingTable[$stackTop]) || !isset($g->parsingTable[$stackTop][$t]))
                {
                    if (isset($g->lambdaRules[$stackTop])) {
                        $rule = $g->lambdaRules[$stackTop];
                    }
                    else {
                        throw new Exception\ParseError("Unexpected token: $t({$tokens[0]->content}), expected one of: ".implode(', ', array_keys($g->parsingTable[$stackTop])));
                    }

                }
                --$stackLevelSizes[0];

                $nt = array_shift($stack); //pop nonterminal from stack
                if ($rule === NULL)
                {
                    $rule = $g->parsingTable[$stackTop][$t];
                }

                $postfix = '';
                if (isset($stackLevels[0][$stackTopAlias]))
                {
                    $postfix = 0;
                    while(isset($stackLevels[0][$stackTopAlias.$postfix])) ++$postfix;
                }
                if (count ($g->rules[$rule]->rewriteTo) > 0)
                {
                    array_unshift($stackLevelSizes, count($g->rules[$rule]->rewriteTo));

                    $stackLevels[0][$stackTopAlias.$postfix] = array();
                    array_unshift($stackLevels, NULL);
                    $stackLevels[0] = &$stackLevels[1][$stackTopAlias.$postfix];

                    $stack = array_merge($g->rules[$rule]->rewriteTo, $stack); //push rewrite rule to stack
                }
                else
                {
                    $stackLevels[0][$stackTopAlias.$postfix] = NULL;
                    $reachedBottom = TRUE;
                }
            }
            while ($reachedBottom && count($stackLevels) > 1 && $stackLevelSizes[0] == 0)
            {
                array_shift($stackLevelSizes);
                array_shift($stackLevels);
            }
        }
        if (count($tokens) > 0)
        {
            throw new Exception\ParseError("Unexpected token: {$tokens[0]->type}, expected end of input");
        }
        return $ptree;
    }

    private function tokenize($input)
    {
        $input = str_replace("\r", '', $input);
        $tokens = array();
        $offset = 0;
        while ($offset < strlen($input))
        {
            $token = NULL;
            if (preg_match('~(?:'.$this->language->getKeywords().')~Ai', $input, $m, 0, $offset))
            {
                $token = Token::create(strtolower(str_replace(' ', '_', $m[0])), $m[0]);
            }
            else
            {
                foreach ($this->language->getTokenPatterns() as $tokenType=>$pattern)
                {
                    if (preg_match('~'.str_replace('~', '\\~', $pattern).'~Axis', $input, $m, 0, $offset))
                    {
                        $token = Token::create($tokenType, $m[0]);
                        break;
                    }
                }
            }
            if ($token === NULL)
            {
                throw new Exception\GtoUnknownToken("Unknown token at offset $offset of input $input");
            }
            if (strlen($m[0]) == 0)
            {
                throw new Exception\GtoUnknownToken("Internal tokenizer error - matched empty token at offset $offset of input $input");
            }
            $offset += strlen($m[0]);
            if (!in_array($token->type, $this->language->getTossTokens()))
            {
                $tokens[] = $token;
            }
        }
        return $tokens;
    }
}